use strict;
use warnings;
=pod
MacBook-Air-3:Candidates harry$ head SM1.tped | cut -d' ' -f-20
5 5:131867565:G:A 0 131867565 G G G G A G A G G G A G G G G G
5 5:131867702:A:G 0 131867702 A A A A A A A A G A A A A A A A
5 5:131867835:A:G 0 131867835 A A G A G A G A A A G G G A A A
5 5:131867926:G:A 0 131867926 G G G G G G G G G G G G G G G G
5 5:131868131:G:GGTTTT 0 131868131 G G G G G G G G GGTTTT G G G G G G G
gpart output
row	chr	start.index	end.index	start.rsID	end.rsID	start.bp	end.bp	blocksize	Name
1	5	1	10	5:131867565:G:A	5:131872865:A:C	131867565	131872865	10	inter-IRF1:IL5-part1
2	5	11	51	5:131873073:T:C	5:131890941:T:G	131873073	131890941	41	IL5-part1
3	5	52	92	5:131891209:G:C	5:131904757:G:A	131891209	131904757	41	IL5/RAD50-part1
4	5	93	114	5:131905060:A:T	5:131911425:G:A	131905060	131911425	22	RAD50-part1
BigLD output
row  chr start.index end.index start.rsID   end.rsID start.bp   end.bp
1  21           1        14 rs59504523  rs2822825 16040232 16042876
2  21          20        23  rs9808806 rs13052089 16043928 16044788
3  21          26        93  rs2822826  rs2822833 16045615 16057487
=cut

my $ped = $ARGV[0];
my $pop = $ARGV[1];
my $gpart = "$ped.bigLD.txt"; 
my %haps;
open(IN, "<$gpart") || die "$gpart $! \n";
<IN>;
my $gparCount = 0;
my %coords;
my %homos;
my %rs;
while(<IN>){
	my @data = split(/\s+/);
	unless($coords{"start"}){
		$coords{"chr"} = $data[1];
		$coords{"start"} = $data[6];
	}
	$coords{"end"} = $data[7];
		
	system "plink --file $ped --maf 0.05 --chr $data[1]  --from-bp $data[6]  --to-bp $data[7] --recode  --out temp > temp.log";
	my $name = "$data[1].$data[6].$data[7]";
	open(MAP, "<temp.map") || die "temp.map $! \n";
	while(<MAP>){
		my @data = split(/\s+/);
		$rs{$name} .= $data[1] . ";";
	}
	close(MAP);	
	open(PED, "<temp.ped") || die "temp.ped $! \n";
	while(<PED>){
		chomp();
		my @haps = split(/\s+/);
		my $i =6;
		my ($hap1, $hap2);
		while($i <$#haps){
			$hap1 .=$haps[$i];
			$i++;
			$hap2 .=$haps[$i];
			$i++;
		}
		#Check if haplotype contains any null alleles. If it does skip it	
		if($hap1 !~ m/0/){
			$haps{$name}{$hap1}++;
		}		
		if($hap2 !~ m/0/){
			$haps{$name}{$hap2}++;
		}
		if($hap1 eq $hap2){
			$homos{$name}{$hap1}++;
		}
	}
	close(PED);
	$gparCount++;		
}
close(IN);

my $popsize = `wc -l $ped.ped | awk '{print \$1}'`;
chomp($popsize);
unless( -e "All.HapsStats.txt"){
	open(OUT, ">All.HapsStats.txt");
	print(OUT "Gene\tLocus_ID\tCount Haplotype\tHapltype\tCountObservedHomozygotes\tCountExpectedHomozygotes\n");
	close(OUT);
}
open(OUT, ">>All.HapsStats.txt");

unless( -e "Summary.HapStats.txt"){
	open(OUT3, ">Summary.HapStats.txt");
	print(OUT3 "Gene\tChr\tStart\tEnd\tBlockStart\tBlockEnd\tPop\tCountHapsInGene\tCountDistinctHaps\tP_BothParenstsHets\tPopSize\tMeanHapLength(SNP)\trsIds\n");
	close(OUT3);
}
my $gene = substr($ped,0,index($ped,'.'));
open(OUT3, ">>Summary.HapStats.txt");
#open(OUT2, ">$ped.$pop.maxhap.counts");
#print(OUT2 "LocusID\tPop\tPop.size\tHap Length\tCount Distinct Haplotypes\tP_bothParenstsHets\n");
my (%lengths,%counts,$length, $count);
my $totalHaps;
my (%means);
foreach my $k (keys %haps){
	my $hapCount = keys %{$haps{$k}};
	$totalHaps += $hapCount;
	my ($coun,$len);
	my %freq;
	foreach my $hap (keys %{$haps{$k}}){
		$lengths{length($hap)} += $haps{$k}{$hap};
		$counts{$haps{$k}{$hap}}++;
		$length += length($hap) * $haps{$k}{$hap};
		$len += length($hap) * $haps{$k}{$hap};
		$count += $haps{$k}{$hap};
		$coun += $haps{$k}{$hap};
		unless($homos{$k}{$hap}){$homos{$k}{$hap}='0'}
		my $freq = ($haps{$k}{$hap}/ (2* $popsize)) * ($haps{$k}{$hap}/ (2*$popsize));
		my $freq2 = sprintf("%.3f",$freq * $popsize);
		print (OUT "$gene\t$k\t$haps{$k}{$hap}\t$hap\t$homos{$k}{$hap}\t$freq2\n");
		#get sum of squares of haplotype frequency
		$freq{$k} += $freq;
	}
	#Probablity of a heterozygote is 1 - square of probality of all homozygoytes
	my $pHet = sprintf("%.3f",(1 - $freq{$k}) * (1 - $freq{$k}));
	my $mean = sprintf("%.1f", ($len / $coun));
	#print(OUT2 "$k\t$pop\t$popsize\t$mean\t$hapCount\t$pHet\n");
	my @blocks = split(/\./,$k);
	print(OUT3 "$gene\t$coords{'chr'}\t$coords{'start'}\t$coords{'end'}\t$blocks[1]\t$blocks[2]\t$pop\t$gparCount\t$hapCount\t$pHet\t$popsize\t$mean\t$rs{$k}\n");
}
close(OUT);
close(OUT3);
